Predicting Earthquakes Part 1¶
This study is ispired by the work published here Earthquake Prediction Using Python: A Geospatial Data Analysis Guide and designed with the help of
In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime
from sklearn.model_selection import GridSearchCV
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
Data Source¶
Data is sourced from the usgs and covers the seismic events over the previous 30 days starting with the date of analysis. For simplicity, only time, magnification, depth, and location data are retained.
In [5]:
from datetime import datetime
def scrape_earthquake_data():
url = 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.csv'
response = requests.get(url)
if response.status_code == 200:
data = pd.read_csv(url)
return data
else:
print(f"Failed to retrieve data. Status code: {response.status_code}")
return None
# Scrape earthquake data
usgsData = pd.DataFrame(scrape_earthquake_data())
# Display the first few rows of the dataset
if usgsData is not None:
earthquake_data = pd.DataFrame({'time': [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S.%fZ") for d in usgsData.time],
'mag' : usgsData.mag,
'depth' : usgsData.depth,
'latitude' : usgsData.latitude,
'longitude' : usgsData.longitude,
'place' : usgsData.place})
else:
earthquake_data = "No earthquake data retrieved."
earthquake_data
Out[5]:
| time | mag | depth | latitude | longitude | place | |
|---|---|---|---|---|---|---|
| 0 | 2024-09-21 01:26:21.880 | 0.61 | 17.5000 | 33.697167 | -116.712833 | 5 km S of Idyllwild, CA |
| 1 | 2024-09-21 01:17:13.390 | 2.61 | 17.3600 | 33.691667 | -116.724833 | 5 km S of Idyllwild, CA |
| 2 | 2024-09-21 01:03:46.900 | 1.09 | 14.5100 | 33.962333 | -117.163667 | 7 km NE of Moreno Valley, CA |
| 3 | 2024-09-21 00:46:30.480 | 1.60 | 15.7600 | 33.996833 | -116.852667 | 8 km NNE of Banning, CA |
| 4 | 2024-09-21 00:42:30.752 | 1.80 | 4.4000 | 63.501600 | -151.131200 | 29 km E of Denali National Park, Alaska |
| ... | ... | ... | ... | ... | ... | ... |
| 9584 | 2024-08-22 02:02:22.680 | 0.92 | 10.5000 | 35.967833 | -120.521333 | 11 km NW of Parkfield, CA |
| 9585 | 2024-08-22 01:51:08.005 | 4.50 | 51.5590 | 31.142300 | 131.408400 | 41 km SSE of Kushima, Japan |
| 9586 | 2024-08-22 01:49:19.004 | 1.60 | 5.2283 | 31.682000 | -104.398000 | 54 km S of Whites City, New Mexico |
| 9587 | 2024-08-22 01:48:44.744 | 1.20 | 5.8948 | 31.683000 | -104.400000 | 54 km S of Whites City, New Mexico |
| 9588 | 2024-08-22 01:47:17.605 | 4.20 | 119.0840 | 1.856600 | 127.207400 | 90 km W of Tobelo, Indonesia |
9589 rows × 6 columns
The figure show the distribution of the seismic events by magnitude over the past 30 days.¶
We are particularly interested in predicting earthquakes with a magnitude of at least 5.
In [7]:
import matplotlib.pyplot as plt
import seaborn as sns
def plot_magnitude_distribution(df):
plt.figure(figsize=(14, 8))
# Create the histogram with KDE plot using blue hues
sns.histplot(df['mag'], bins=30, color='cornflowerblue', edgecolor='black', linewidth=1.2)
# Add a vertical line at the mean magnitude
mean_magnitude = df['mag'].mean()
plt.axvline(mean_magnitude, color='red', linestyle='--', linewidth=2)
plt.text(mean_magnitude + 0.1, plt.ylim()[1] * 0.9, f'Mean: {mean_magnitude:.2f}', color='red', fontsize=14, fontweight='bold')
# Add a vertical line at the median magnitude
median_magnitude = df['mag'].median()
plt.axvline(median_magnitude, color='green', linestyle='--', linewidth=2)
plt.text(median_magnitude + 0.1, plt.ylim()[1] * 0.8, f'Median: {median_magnitude:.2f}', color='green', fontsize=14, fontweight='bold')
# Customizing the title and labels
plt.title('Distribution of Earthquake Magnitudes', fontsize=20, fontweight='bold', color='navy')
plt.xlabel('mag', fontsize=16, fontweight='bold')
plt.ylabel('Frequency', fontsize=16, fontweight='bold')
# Customize the tick parameters
plt.xticks(fontsize=12, fontweight='bold')
plt.yticks(fontsize=12, fontweight='bold')
# Add custom grid
plt.grid(True, linestyle='--', alpha=0.6)
# Highlight certain ranges with a shaded area
plt.axvspan(5, np.ceil(df['mag'].max()), color='red', alpha=0.3, lw=0)
plt.text(6, plt.ylim()[1] * 0.95, 'Critical Range (>5)', color='navy', fontsize=14, fontweight='bold', ha='center')
# Adding annotations for the highest frequency bin
max_freq_bin = df['mag'].value_counts().idxmax()
max_freq_count = df['mag'].value_counts().max()
plt.annotate(f'Highest Frequency\n{max_freq_bin} Mag: {max_freq_count} times',
xy=(max_freq_bin, df['mag'].value_counts()[max_freq_bin]),
xytext=(max_freq_bin + 0.5, df['mag'].value_counts()[max_freq_bin] + 5),
arrowprops=dict(facecolor='black', shrink=0.05),
fontsize=14, fontweight='bold',
bbox=dict(boxstyle="round,pad=0.3", edgecolor="black", facecolor="lightgray", alpha=0.8))
# Show the plot
plt.show()
# Plot the enhanced magnitude distribution with blue hues
plot_magnitude_distribution(earthquake_data)
C:\Users\Lenovo\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
The Data Below shows the location of the Events over the past 30 days¶
In [9]:
import folium
from folium.plugins import MarkerCluster
def plot_enhanced_geo_map(df):
# Initialize the map centered around the mean coordinates
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=2)
# Create a marker cluster
marker_cluster = MarkerCluster().add_to(m)
# Add points to the map with color coding and informative popups
for _, row in df.iterrows():
magnitude = row['mag']
if magnitude < 3.0:
color = 'blue'
elif 3.0 <= magnitude < 5.0:
color = 'green'
elif 5.0 <= magnitude < 7.0:
color = 'orange'
else:
color = 'red'
folium.CircleMarker(
location=[row['latitude'], row['longitude']],
radius=5,
popup=(
f"Location: {row['place']}<br>"
f"Time: {row['time']}<br>"
f"Magnitude: {row['mag']}<br>"
f"Depth: {row['depth']} km"
),
color=color,
fill=True,
fill_color=color,
fill_opacity=0.7
).add_to(marker_cluster)
return m
# Display the enhanced map
enhanced_earthquake_map = plot_enhanced_geo_map(earthquake_data)
#enhanced_earthquake_map.save("enhanced_earthquake_map.html")
enhanced_earthquake_map
Out[9]:
Make this Notebook Trusted to load map: File -> Trust Notebook